{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 9.2 案例实战 - AdaBoost信用卡精准营销模型"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 9.2.2 模型搭建"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1.读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>月收入（元）</th>\n",
       "      <th>月消费（元）</th>\n",
       "      <th>性别</th>\n",
       "      <th>月消费/月收入</th>\n",
       "      <th>响应</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>30</td>\n",
       "      <td>7275</td>\n",
       "      <td>6062</td>\n",
       "      <td>0</td>\n",
       "      <td>0.833265</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>25</td>\n",
       "      <td>17739</td>\n",
       "      <td>13648</td>\n",
       "      <td>0</td>\n",
       "      <td>0.769378</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>29</td>\n",
       "      <td>25736</td>\n",
       "      <td>14311</td>\n",
       "      <td>0</td>\n",
       "      <td>0.556069</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23</td>\n",
       "      <td>14162</td>\n",
       "      <td>7596</td>\n",
       "      <td>0</td>\n",
       "      <td>0.536365</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>27</td>\n",
       "      <td>15563</td>\n",
       "      <td>12849</td>\n",
       "      <td>0</td>\n",
       "      <td>0.825612</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   年龄  月收入（元）  月消费（元）  性别   月消费/月收入  响应\n",
       "0  30    7275    6062   0  0.833265   1\n",
       "1  25   17739   13648   0  0.769378   1\n",
       "2  29   25736   14311   0  0.556069   1\n",
       "3  23   14162    7596   0  0.536365   1\n",
       "4  27   15563   12849   0  0.825612   1"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_excel('信用卡精准营销模型.xlsx')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2.提取特征变量和目标变量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = df.drop(columns='响应') \n",
    "y = df['响应']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "3.划分训练集和测试集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "4.模型训练及搭建"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,\n",
       "                   n_estimators=50, random_state=123)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.ensemble import AdaBoostClassifier\n",
    "clf = AdaBoostClassifier(random_state=123)\n",
    "clf.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 9.2.3 模型预测及评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1 1 1 0 1 0 1 0 0 0 1 1 1 1 1 0 0 1 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 1 0 1 0\n",
      " 1 1 0 0 0 1 1 0 0 1 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1\n",
      " 0 0 0 0 1 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1\n",
      " 0 1 0 1 0 0 0 1 0 0 0 1 0 0 1 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 1 1 1 0 0 1\n",
      " 0 1 0 1 0 0 0 0 0 1 1 0 1 0 1 1 1 0 0 1 1 0 0 0 0 1 0 1 0 0 0 1 0 1 0 1 1\n",
      " 0 0 1 0 0 0 0 0 0 0 1 1 0 1 1]\n"
     ]
    }
   ],
   "source": [
    "# 模型搭建完毕后，通过如下代码预测测试集数据：\n",
    "y_pred = clf.predict(X_test)\n",
    "print(y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>预测值</th>\n",
       "      <th>实际值</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   预测值  实际值\n",
       "0    1    1\n",
       "1    1    1\n",
       "2    1    1\n",
       "3    0    0\n",
       "4    1    1"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 通过和之前章节类似的代码，我们可以将预测值和实际值进行对比：\n",
    "a = pd.DataFrame()  # 创建一个空DataFrame \n",
    "a['预测值'] = list(y_pred)\n",
    "a['实际值'] = list(y_test)\n",
    "a.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.85\n"
     ]
    }
   ],
   "source": [
    "# 查看预测准确度\n",
    "from sklearn.metrics import accuracy_score\n",
    "score = accuracy_score(y_pred, y_test)\n",
    "print(score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.19294615, 0.80705385],\n",
       "       [0.41359387, 0.58640613],\n",
       "       [0.42597039, 0.57402961],\n",
       "       [0.66817389, 0.33182611],\n",
       "       [0.32850159, 0.67149841]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看预测分类概率\n",
    "y_pred_proba = clf.predict_proba(X_test)\n",
    "y_pred_proba[0:5]  # 查看前5项，第一列为分类为0的概率，第二列为分类为1的概率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAS6UlEQVR4nO3df5Bd5X3f8fd3JSTxQ0KCXZAtLawgIkZQDGSDTZzWeMCuTBLUZBwbpUwch4Q6Me6M7WmHFg91iTvT2I09zQzFURxCnImDSaa1t0YZPHFwaAgCrWMBRlTOIn5oI2GtkRA/hFit9ts/7lpdVivdI+nuPbrPvl8zO3PPOY/ufh7d3Y+Ozj3nnshMJEmdr6vuAJKk1rDQJakQFrokFcJCl6RCWOiSVIi5dX3j7u7u7Ovrq+vbS1JH+u53v/ujzOyZbltthd7X18fg4GBd316SOlJEPHe4bR5ykaRCWOiSVAgLXZIKYaFLUiEsdEkqRNNCj4i7ImJnRHz/MNsjIn4/IoYi4vGIuLz1MSVJzVTZQ78bWH2E7e8HVk583QTcefyxJElHq+l56Jn5YET0HWHIGuAr2fgc3g0RsTgi3pKZO1qUUSeAA+PJHz/0DC+/vr/uKFLHu/rCs3l77+KWP28rLixaBmybtDw8se6QQo+Im2jsxXPOOee04FurXYZ2vspn73sKgIiaw0gd7qxFC07YQp/u13vau2Zk5jpgHUB/f7931jiBZCabd7zM3tED025/9kevAfClG36K1RcvbWc0SRW1otCHgd5Jy8uB7S14XrXRpm0v8Yv/4++bjjt1/pw2pJF0LFpR6APAzRFxD/AOYI/HzzvPa2809sw//XMX8rali6Ydc/K8Li7rXdLOWJKOQtNCj4g/B64CuiNiGPhPwEkAmfklYD1wLTAE7AU+MlNhNXOe3L4HgMvOWcxPnXtGzWkkHYsqZ7msbbI9gY+1LJHaanw8+fy3tnDnd57mZ84/k0uWt/6NGkntUdvH52rmZSafve8pnntx72HHjLz6Bo9te4m1V5zD7Wsu4qQ5XjwsdSoLvWD79o/zR3/3DGctnE/3afOnHdPVBZ/5hVV8+Gf6CM9HlDqahV6Il/ft55uP7WBsfPzgutGxxuNf/9kVfPTd59cVTVKbWOiFGNi0nU9/fdqP22HpogVtTiOpDhZ6IcYONPbGv/2pd7P45JMOrp/TFSw+ZV5dsSS1kYVemDNOmceSUy1waTbylAZJKoR76B3ov9y3mfVPvPCmda++MVZTGkknCgu9A/3d0ItkJlee3/2m9W9dvIDFp5x0mD8lqXQWeocY2vkKf/zQs4xnsmPP6/x03xn83gffXncsSScQC71DDDy2gz975HnOWjifeXO66D/XD8mS9GYWeg2+/097eH7X4S/Hn84//vAVIuDRW6+ZoVSSOp2FXoO1f7iBV/Yd/ZuYSzw+LukILPQZtnd0jLHxN9+cad/+A3yov5df/9kVR/VcPQun/zwWSQILfUY9/PSL/Osvb2B8mpvtnb1oPj+5dGH7Q0kqloU+g154+XXGE377qvM5Y9LVm10R/Pwlb6kxmaQSWegtNj6efPye7zG8+3V2vzYKwAf7e+nrPrXmZJJKZ6G32GujY9z3+A7O6zmVFd2n0n/uEpYtObnuWJJmAQu9RTY+u4sHfzBy8DPIf+WKc/iNf35ezakkzSYWeot84Vs/4OGtL9IVMG9uFys8xCKpzSz0FhnP5B0rzuBr/+bKuqNImqX8+FxJKoSFLkmF8JDLcXhgy04+9mf/wNh4sv/AOO9ccWbdkSTNYhb6cXh656vsHT3AR97Vx/y5c7jqJ3vqjiRpFrPQj9L+A+Pc+r+eYNdrozz3YuMTEz/x3gtYtMAPzpJULwv9KG1/6XXuHRxm2eKTWXzKSfzLi87mtHn+NUqqn01U0aZtL7Hp+d3smric/1Pvu4Bfunx5zakk6f+z0Cv6j//zCTbveBmACFi6aEHNiSTpzSz0IxgfT57btZfxTPaOjnH1287iv/3y25k7J1joMXNJJxgL/QjueugZPnvfUweXL+1dzJJJH4MrSSeSSoUeEauB/w7MAb6cmf91yvZzgD8BFk+MuSUz17c4a9vtem2UroAvfuhSAN7heeaSTmBNCz0i5gB3AO8FhoGNETGQmZsnDfs0cG9m3hkRq4D1QN8M5G27OV3BmkuX1R1Dkpqqcun/FcBQZm7NzFHgHmDNlDEJLJp4fDqwvXURJUlVVCn0ZcC2ScvDE+sm+wxwQ0QM09g7//h0TxQRN0XEYEQMjoyMHENcSdLhVCn0mGbd1NserwXuzszlwLXAn0bEIc+dmesysz8z+3t6vExeklqpypuiw0DvpOXlHHpI5UZgNUBmPhwRC4BuYGcrQrZTZvLgP/6IV/btZ2jnq3XHkaTKqhT6RmBlRKwA/gm4HviVKWOeB64G7o6IC4EFQEceU3lqxyt8+K5HDy53nza/xjSSVF3TQs/MsYi4GbifximJd2XmkxFxOzCYmQPAp4A/jIhP0Dgc82uZOfWwTEd4Y+wAAJ/9VxfzjhVn0LPQQpfUGSqdhz5xTvn6Ketum/R4M/Cu1kar17IlJ7Py7IV1x5CkyrxjkSQVwkKXpEJY6JJUiFn94Vw/+OErDGzaTk46rf6FPW/UmEiSjt2sLvS7//5ZvvrI88ztevO1UwsXzKV3yck1pZKkYzOrCz0zOWvhfB699Zq6o0jScfMYuiQVwkKXpEIUfcjlC9/awp1/+/Rht4+NJ2cv9N6gkspQdKH/3xdeYdGCk/jQT/cedswly09vYyJJmjlFFzpAz8L5/PvVb6s7hiTNOI+hS1IhLHRJKoSFLkmFKO4YemaybdfrjB44wGujY3XHkaS2Ka7Qv/3UTn7jK4MHly/tXVxjGklqn+IKfffeUQA+8wurOPO0+Vz01kU1J5Kk9iiu0H/s6gvPpveMU+qOIUlt45uiklQIC12SCmGhS1IhijmG/ugzu3jh5X1s2vZS3VEkqRZFFPqrb4zxoXUPkxN3kpvbFSxcUMTUJKmyjm698fHk5X372fP6fjLhY+85n1+8bDmnn3wSi0+ZV3c8SWqrji70T9y7iW9s2n5weenpJ/MTZ51WYyJJqk9HF/r2l15nRfep/OqV5zJ3ThfXXfLWuiNJUm06rtD/4fnd/Of/vZkD4+NsHXmNS3sX85F3rag7liTVruNOW9z4zC4e2/YSZ546nyvPO/OIdyOSpNmk4/bQf+zOGy7nlHkdG1+SWq7j9tAlSdOrVOgRsToitkTEUETccpgxH4yIzRHxZER8tbUxJUnNND1mERFzgDuA9wLDwMaIGMjMzZPGrAT+A/CuzNwdEWfNVGBJ0vSq7KFfAQxl5tbMHAXuAdZMGfObwB2ZuRsgM3e2NqYkqZkqhb4M2DZpeXhi3WQXABdExEMRsSEiVk/3RBFxU0QMRsTgyMjIsSWWJE2rSqHHNOtyyvJcYCVwFbAW+HJEHHLvt8xcl5n9mdnf09NztFklSUdQpdCHgckney8Htk8z5huZuT8znwG20Ch4SVKbVCn0jcDKiFgREfOA64GBKWO+DrwHICK6aRyC2drKoJKkI2ta6Jk5BtwM3A88BdybmU9GxO0Rcd3EsPuBFyNiM/AA8O8y88WZCi1JOlSlSy0zcz2wfsq62yY9TuCTE1+SpBp4pagkFcJCl6RCWOiSVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEBa6JBXCQpekQljoklQIC12SCmGhS1IhLHRJKoSFLkmFsNAlqRAWuiQVwkKXpEJY6JJUCAtdkgphoUtSISx0SSqEhS5JhbDQJakQFrokFcJCl6RCWOiSVAgLXZIKYaFLUiEsdEkqhIUuSYWoVOgRsToitkTEUETccoRxH4iIjIj+1kWUJFXRtNAjYg5wB/B+YBWwNiJWTTNuIfBvgUdaHVKS1FyVPfQrgKHM3JqZo8A9wJppxv0O8DlgXwvzSZIqqlLoy4Btk5aHJ9YdFBGXAb2Z+c0jPVFE3BQRgxExODIyctRhJUmHV6XQY5p1eXBjRBfwReBTzZ4oM9dlZn9m9vf09FRPKUlqqkqhDwO9k5aXA9snLS8ELga+ExHPAu8EBnxjVJLaq0qhbwRWRsSKiJgHXA8M/HhjZu7JzO7M7MvMPmADcF1mDs5IYknStJoWemaOATcD9wNPAfdm5pMRcXtEXDfTASVJ1cytMigz1wPrp6y77TBjrzr+WJKko+WVopJUCAtdkgphoUtSISx0SSqEhS5JhbDQJakQFrokFcJCl6RCWOiSVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEBa6JBXCQpekQljoklQIC12SCmGhS1IhLHRJKoSFLkmFsNAlqRAWuiQVwkKXpEJY6JJUCAtdkgphoUtSISx0SSqEhS5JhbDQJakQlQo9IlZHxJaIGIqIW6bZ/smI2BwRj0fEtyPi3NZHlSQdSdNCj4g5wB3A+4FVwNqIWDVl2PeA/sy8BPhL4HOtDipJOrIqe+hXAEOZuTUzR4F7gDWTB2TmA5m5d2JxA7C8tTElSc1UKfRlwLZJy8MT6w7nRuCvptsQETdFxGBEDI6MjFRPKUlqqkqhxzTrctqBETcA/cDnp9uemesysz8z+3t6eqqnlCQ1NbfCmGGgd9LycmD71EERcQ1wK/DuzHyjNfEkSVVV2UPfCKyMiBURMQ+4HhiYPCAiLgP+ALguM3e2PqYkqZmmhZ6ZY8DNwP3AU8C9mflkRNweEddNDPs8cBrwFxGxKSIGDvN0kqQZUuWQC5m5Hlg/Zd1tkx5f0+JckqSj5JWiklQIC12SCmGhS1IhLHRJKoSFLkmFsNAlqRAWuiQVwkKXpEJY6JJUCAtdkgphoUtSISx0SSqEhS5JhbDQJakQFrokFcJCl6RCWOiSVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEBa6JBXCQpekQljoklQIC12SCmGhS1IhLHRJKoSFLkmFsNAlqRAWuiQVolKhR8TqiNgSEUMRccs02+dHxNcmtj8SEX2tDipJOrKmhR4Rc4A7gPcDq4C1EbFqyrAbgd2Z+RPAF4HfbXVQSdKRVdlDvwIYysytmTkK3AOsmTJmDfAnE4//Erg6IqJ1MSVJzVQp9GXAtknLwxPrph2TmWPAHuDMqU8UETdFxGBEDI6MjBxT4BXdp3LtP1tKl/9eSNKbzK0wZrrmzGMYQ2auA9YB9Pf3H7K9ivddtJT3XbT0WP6oJBWtyh76MNA7aXk5sP1wYyJiLnA6sKsVASVJ1VQp9I3AyohYERHzgOuBgSljBoAPTzz+APA3mXlMe+CSpGPT9JBLZo5FxM3A/cAc4K7MfDIibgcGM3MA+CPgTyNiiMae+fUzGVqSdKgqx9DJzPXA+inrbpv0eB/wy62NJkk6Gl4pKkmFsNAlqRAWuiQVwkKXpEJEXWcXRsQI8Nwx/vFu4EctjNMJnPPs4Jxnh+OZ87mZ2TPdhtoK/XhExGBm9tedo52c8+zgnGeHmZqzh1wkqRAWuiQVolMLfV3dAWrgnGcH5zw7zMicO/IYuiTpUJ26hy5JmsJCl6RCnNCFPhtvTl1hzp+MiM0R8XhEfDsizq0jZys1m/OkcR+IiIyIjj/FrcqcI+KDE6/1kxHx1XZnbLUKP9vnRMQDEfG9iZ/va+vI2SoRcVdE7IyI7x9me0TE70/8fTweEZcf9zfNzBPyi8ZH9T4NnAfMAx4DVk0Z89vAlyYeXw98re7cbZjze4BTJh7/1myY88S4hcCDwAagv+7cbXidVwLfA5ZMLJ9Vd+42zHkd8FsTj1cBz9ad+zjn/C+Ay4HvH2b7tcBf0bjj2zuBR473e57Ie+iz8ebUTeecmQ9k5t6JxQ007iDVyaq8zgC/A3wO2NfOcDOkypx/E7gjM3cDZObONmdstSpzTmDRxOPTOfTOaB0lMx/kyHduWwN8JRs2AIsj4i3H8z1P5EJv2c2pO0iVOU92I41/4TtZ0zlHxGVAb2Z+s53BZlCV1/kC4IKIeCgiNkTE6ralmxlV5vwZ4IaIGKZx/4WPtydabY72972pSje4qEnLbk7dQSrPJyJuAPqBd89oopl3xDlHRBfwReDX2hWoDaq8znNpHHa5isb/wv5PRFycmS/NcLaZUmXOa4G7M/P3IuJKGndBuzgzx2c+Xi1a3l8n8h76bLw5dZU5ExHXALcC12XmG23KNlOazXkhcDHwnYh4lsaxxoEOf2O06s/2NzJzf2Y+A2yhUfCdqsqcbwTuBcjMh4EFND7EqlSVft+Pxolc6LPx5tRN5zxx+OEPaJR5px9XhSZzzsw9mdmdmX2Z2UfjfYPrMnOwnrgtUeVn++s03gAnIrppHILZ2taUrVVlzs8DVwNExIU0Cn2krSnbawD41YmzXd4J7MnMHcf1jHW/E9zkXeJrgR/QeHf81ol1t9P4hYbGC/4XwBDwKHBe3ZnbMOe/Bn4IbJr4Gqg780zPecrY79DhZ7lUfJ0D+AKwGXgCuL7uzG2Y8yrgIRpnwGwC3ld35uOc758DO4D9NPbGbwQ+Cnx00mt8x8TfxxOt+Ln20n9JKsSJfMhFknQULHRJKoSFLkmFsNAlqRAWuiQVwkKXpEJY6JJUiP8H6DltT0KHS1EAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 绘制ROC曲线\n",
    "from sklearn.metrics import roc_curve\n",
    "fpr, tpr, thres = roc_curve(y_test.values, y_pred_proba[:,1])\n",
    "import matplotlib.pyplot as plt\n",
    "plt.plot(fpr, tpr)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9559047909673483\n"
     ]
    }
   ],
   "source": [
    "# 查看AUC值\n",
    "from sklearn.metrics import roc_auc_score\n",
    "score = roc_auc_score(y_test, y_pred_proba[:,1])\n",
    "print(score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.18, 0.2 , 0.36, 0.02, 0.24])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看特征重要性\n",
    "clf.feature_importances_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>特征名称</th>\n",
       "      <th>特征重要性</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>月消费（元）</td>\n",
       "      <td>0.36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>月消费/月收入</td>\n",
       "      <td>0.24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>月收入（元）</td>\n",
       "      <td>0.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>年龄</td>\n",
       "      <td>0.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>性别</td>\n",
       "      <td>0.02</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      特征名称  特征重要性\n",
       "2   月消费（元）   0.36\n",
       "4  月消费/月收入   0.24\n",
       "1   月收入（元）   0.20\n",
       "0       年龄   0.18\n",
       "3       性别   0.02"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 通过DataFrame的方式展示特征重要性\n",
    "features = X.columns  # 获取特征名称\n",
    "importances = clf.feature_importances_  # 获取特征重要性\n",
    "\n",
    "# 通过二维表格形式显示\n",
    "importances_df = pd.DataFrame()\n",
    "importances_df['特征名称'] = features\n",
    "importances_df['特征重要性'] = importances\n",
    "importances_df.sort_values('特征重要性', ascending=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "9.2.4 模型参数（选学）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # 分类模型，通过如下代码可以查看官方介绍\n",
    "# from sklearn.ensemble import AdaBoostClassifier\n",
    "# AdaBoostClassifier?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # 回归模型，通过如下代码可以查看官方介绍\n",
    "# from sklearn.ensemble import AdaBoostRegressor\n",
    "# AdaBoostRegressor?"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
